library("rstan")
library("Amelia")

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

rm(list=ls())

setwd("/home/andrei/Desktop/Dropbox/rolling elections/Bayes")
bulk <- read.csv("mydata.csv")
bulk <- bulk[c("hlvote","thedate","ststart","stend","state","partymark",
"medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang","scres","stres","year","days")] 
nrow(bulk)
bulk <- bulk[rowSums(is.na(bulk[c("hlvote")]))==0,] 
nrow(bulk)

## main variable
hlvote <- log(bulk[["hlvote"]]/(1-bulk[["hlvote"]]))
days <- bulk[["days"]]
group <- as.numeric(factor(paste(bulk[["state"]],bulk[["year"]],sep="",collapse=NULL))) 
year <- as.numeric(factor(bulk[["year"]]))
state <- as.numeric(factor(bulk[["state"]]))
ivlist <- c("partymark","medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang","scres","stres")

## imputations 
xvar <- as.matrix(bulk[ivlist])
for (i in 1:ncol(xvar)){
xvar[,i] <- (xvar[,i]-mean(xvar[,i],na.rm=TRUE))/sd(xvar[,i],na.rm=TRUE)
}
 
alldata <- cbind(xvar,hlvote)
imputed <- amelia(alldata,m=10)

imputations <- as.list(1:10)
mydata <- as.list(1:10)
for (k in 1:length(imputations)) {
imputations[[k]] <- cbind(imputed[["imputations"]][[k]][,ivlist])
mydata[[k]] <- list(y=hlvote, z=days, x=imputations[[k]], N=length(hlvote), K=ncol(imputations[[k]]), group=group, numGroups=max(group))
}

## define the model

stanmodelcode <- "
data {
int<lower=0> N; // number of observations 
int<lower=0> numGroups; // number of groups
int group[N];
vector[N] y; //  dependent variable
vector[N] z; // independent variables 
int<lower=0> K; // number of independent variables
matrix[N,K] x; // independent variables
}
parameters {
vector[2] malpha;
cov_matrix[2] talpha;
vector[2] alpha[numGroups];
real<lower=0> sigma;
vector[K] gamma;
}
model {
alpha ~ multi_normal(malpha,talpha);
for (i in 1:N) 
  y[i] ~ normal(alpha[group[i]][1] + z[i]*alpha[group[i]][2] + x[i,]*gamma,sqrt(sigma));
}
" 

## fit
fit <- stan(model_code = stanmodelcode, data = mydata[[1]], iter = 50, chains = 1)

setwd("/home/andrei/Desktop/rolling election rewrite/chains HLVOTE")

save(fit,file="init.RData")

outfit1 <-  stan(fit=fit, data = mydata[[1]], iter = 5000, verbose=FALSE)
fitcheck <- summary(outfit1)
save(outfit1,file="outfit1.RData")
rm(outfit1)
outfit2 <-  stan(fit=fit, data = mydata[[2]], iter = 5000, verbose=FALSE)
save(outfit2,file="outfit2.RData")
rm(outfit2)
outfit3 <-  stan(fit=fit, data = mydata[[3]], iter = 5000, verbose=FALSE)
save(outfit3,file="outfit3.RData")
rm(outfit3)
outfit4 <-  stan(fit=fit, data = mydata[[4]], iter = 5000, verbose=FALSE)
save(outfit4,file="outfit4.RData")
rm(outfit4)
outfit5 <-  stan(fit=fit, data = mydata[[5]], iter = 5000, verbose=FALSE)
save(outfit5,file="outfit5.RData")
rm(outfit5)
outfit6 <-  stan(fit=fit, data = mydata[[6]], iter = 5000, verbose=FALSE)
save(outfit6,file="outfit6.RData")
rm(outfit6)
outfit7 <-  stan(fit=fit, data = mydata[[7]], iter = 5000, verbose=FALSE)
save(outfit7,file="outfit7.RData")
rm(outfit7)
outfit8 <-  stan(fit=fit, data = mydata[[8]], iter = 5000, verbose=FALSE)
save(outfit8,file="outfit8.RData")
rm(outfit8)
outfit9 <-  stan(fit=fit, data = mydata[[9]], iter = 5000, verbose=FALSE)
save(outfit9,file="outfit9.RData")
rm(outfit9)
outfit10 <-  stan(fit=fit, data = mydata[[10]], iter = 5000, verbose=FALSE)
save(outfit10,file="outfit10.RData")
rm(outfit10)
 

## combine chains
load("outfit1.RData")
load("outfit2.RData")
load("outfit3.RData")
load("outfit4.RData")
load("outfit5.RData")
load("outfit6.RData")
load("outfit7.RData")
load("outfit8.RData")
load("outfit9.RData")
load("outfit10.RData")

chains <- as.list(1:10) 
chains[[1]] <- extract(outfit1,permuted=FALSE)
chains[[2]] <- extract(outfit2,permuted=FALSE)
chains[[3]] <- extract(outfit3,permuted=FALSE)
chains[[4]] <- extract(outfit4,permuted=FALSE) 
chains[[5]] <- extract(outfit5,permuted=FALSE)
chains[[6]] <- extract(outfit6,permuted=FALSE) 
chains[[7]] <- extract(outfit7,permuted=FALSE)
chains[[8]] <- extract(outfit8,permuted=FALSE) 
chains[[9]] <- extract(outfit9,permuted=FALSE)
chains[[10]] <- extract(outfit10,permuted=FALSE) 

checks <- as.list(1:10)
names <- paste("outfit",1:10,sep="")
for (j in 1:10) {
vec <- summary(get(names[j]))$summary[,"Rhat"]
checks[[j]] <- (vec <=1.1 &  vec >= 0.9)
}
checks.complete <- do.call(cbind,checks)
checks.complete

for (j in 1:10) { 
kappa <- ncol(chains[[j]])
temp <- as.list(1:kappa)
for (f in 1:kappa) {
  temp[[f]] <- chains[[j]][,f,]
  }
tempo <- do.call(rbind,temp)
index <- sample(1:nrow(tempo),2000)
chains[[j]] <- tempo[index,]
}
tempo <- do.call(rbind,chains)
index <- sample(1:nrow(tempo),5000)
combined.sample <- tempo[index,]
## extract alpha, beta, and gamma separately
alpha <- combined.sample[,paste("alpha[",1:max(group),",1]",sep="",collapse=NULL)]
beta <- combined.sample[,paste("alpha[",1:max(group),",2]",sep="",collapse=NULL)]
gamma <- combined.sample[,paste("gamma[",1:length(ivlist),"]",sep="",collapse=NULL)]

colnames(alpha) <- toupper(sort(unique(factor(paste(bulk[["state"]],bulk[["year"]],sep=" ",collapse=NULL)))))
colnames(beta) <- toupper(sort(unique(factor(paste(bulk[["state"]],bulk[["year"]],sep=" ",collapse=NULL)))))
colnames(gamma) <- ivlist

save(list=c("alpha","beta","gamma"),file="HLVOTE.Estimates.RData")

## summary table
load("HLVOTE.Estimates.RData")
delta <- cbind(malpha=rowMeans(alpha),mbeta=rowMeans(beta),gamma)
sums <- matrix(NA,ncol(delta),3)
for (j in 1:ncol(delta)) {
  sums[j,1] <- mean(delta[,j])
  sums[j,2:3] <- quantile(delta[,j],prob=c(0.025,0.975))
}
rownames(sums) <- colnames(delta)
write.csv(sums,file="Summary HLVOTE.csv")

sums.alpha  <- matrix(NA,ncol(alpha),3)
for (j in 1:ncol(alpha)) {
  sums.alpha[j,1] <- mean(alpha[,j])
  sums.alpha[j,2:3] <- quantile(alpha[,j],prob=c(0.025,0.975))
}
rownames(sums.alpha) <- colnames(alpha)
write.csv(sums.alpha,file="Summary alpha HLVOTE.csv")

sums.beta  <- matrix(NA,ncol(beta),3)
for (j in 1:ncol(beta)) {
  sums.beta[j,1] <- mean(beta[,j])
  sums.beta[j,2:3] <- quantile(beta[,j],prob=c(0.025,0.975))
}
rownames(sums.beta) <- colnames(beta)
write.csv(sums.beta,file="Summary beta HLVOTE.csv")


## predictions by state-year
# the independent variables were centered around zero before estimation
pred <- as.list(1:max(group))
for (i in 1:max(group)) {
  pred[[i]] <- cbind(days=as.vector(0:41),matrix(NA,42,3,dimnames=list(NULL,c("xb","lb","ub"))))
  for (j in 1:(1+max(days.data[[i]]))) {
    x <- j-1
    xb <- exp(alpha[,i] + x*beta[,i])/(1+exp(alpha[,i] + x*beta[,i]))
    pred[[i]][j,2] <- mean(xb)
    pred[[i]][j,3:4] <- quantile(xb, prob=c(0.025,0.975))
  }
}

setEPS(width=5,height=8)
postscript("hlvote.eps")
par(mfrow=c(8,5))
par(mar = c(1.5, 1.2,1.2,0.7))
for (i in 1:length(labs)) {
  plot(y=pred[[i]][,"xb"],x=pred[[i]][,"days"],type="l",col="white",ylab="Hopeless Vote", xlab="Days from Phase I in state",cex.lab=0.5 ,cex.axis=0.5, cex.main=0.5,
       xlim=c(-2,42),ylim=c(0,0.7), main=labs[i], tck = -0.02, mgp=c(0.6,0,0))
  lines(y=pred[[i]][,"xb"],x=pred[[i]][,"days"])
  lines(y=pred[[i]][,"lb"],x=pred[[i]][,"days"],lty=2)
  lines(y=pred[[i]][,"ub"],x=pred[[i]][,"days"],lty=2)
  par(new=TRUE)
  hist(days.data[[i]], axes=FALSE, xlab=NULL,ylab=NULL,main=NULL, xlim=c(-2,42), border="gray20") 
}
dev.off()